rm(list = ls())

## Load libraries

library(plm) # v.2.6-1
library(texreg) # v.1.38.6
library(tidyverse) # v.1.3.2
library(broom) # v.0.8.0
library(margins) # v.0.3.26
library(stargazer) # v.5.2.3

## Load data

load("working/bes_reduced.Rdata")

bes_reduced$OccHumanProx <- as.numeric(scale(bes_reduced$OccHumanProx))

bes_reduced$OccHumanProx_binned <- factor(as.numeric(bes_reduced$proximity_binned) * bes_reduced$pandemic + 1, labels = c("low", "low", "mid", "high"))

bes_reduced %>%
  filter(wave %in% c(14,15,18,20)) %>%
  group_by(wave) %>%
  summarise(rho = cor(redistSelf, taxSpendSelf, use=  "complete.obs"))

# define analysis function

analysis_func <- function(var, occ_unemployed_digit = 1){
  
  if(occ_unemployed_digit == 1) bes_reduced$occ_unemp <- bes_reduced$occ_unemp_1dig
  if(occ_unemployed_digit == 3) bes_reduced$occ_unemp <- bes_reduced$occ_unemp_3dig
  
  print(var)
  
  bes_reduced$outcome_scale <- as.numeric(scale(bes_reduced[[var]]))
  
  ## Model 0.1
  
  plm_out_0.1 <- lm(outcome_scale ~ proximity_binned + unemployed + occ_unemp + as.factor(wave) - 1,
                    data = bes_reduced)
  
  
  plm_out_0.1 %>% tidy(conf.int = TRUE) %>%
    mutate(term = factor(gsub("proximity_binned","Proximity to others: ", term),
                         levels = c("Proximity to others: low",
                                    "Proximity to others: mid",
                                    "Proximity to others: high"))) %>%
    filter(grepl("Proximity", term)) %>%
    ggplot(aes(x = estimate, y = term, xmin = conf.low, xmax = conf.high)) + geom_pointrange() +
    ylab("") + 
    xlab("Pooled estimate (no fixed effects)") + 
    theme_bw() + 
    ggtitle(var)
  if(occ_unemployed_digit == 1) ggsave(paste0("output/proximity_",var,"_pooled.png"), width = 7, height = 5)
  
  
  ## Model 1
  
  plm_out_1 <- plm(outcome_scale ~ OccHumanProx + unemployed + occ_unemp + as.factor(wave),
                   effect = "individual",
                   index = c("id"),
                   model = "within",
                   data = bes_reduced)
  
  vcov_1 <- vcov(plm_out_1)
  mfx_1 <- tidy(plm_out_1, conf.int = TRUE) %>% filter(term == "OccHumanProx")
  mfx_1$model <- 1
  
  ## Model 2
  
  plm_out_2 <- plm(outcome_scale ~ OccHumanProx * workHomeCoronaSelf_binary + unemployed + occ_unemp + as.factor(wave),
                   effect = "individual",
                   index = c("id"),
                   model = "within",
                   data = bes_reduced)
  
  vcov_2 <- vcov(plm_out_2)
  
  mfx_2_pe <- c(coef(plm_out_2)["OccHumanProx"],
                coef(plm_out_2)["OccHumanProx"] + coef(plm_out_2)["OccHumanProx:workHomeCoronaSelf_binary"])
  names(mfx_2_pe) <- c("OccHumanProx * WorkHome:No", "OccHumanProx * WorkHome:Yes")
  
  mfx_2_se <- c(sqrt(diag(vcov_2))["OccHumanProx"],
                sqrt(diag(vcov_2)["OccHumanProx"] + diag(vcov_2)["OccHumanProx:workHomeCoronaSelf_binary"] + (2*vcov_2["OccHumanProx","OccHumanProx:workHomeCoronaSelf_binary"])))
  
  names(mfx_2_se) <- names(mfx_2_pe)
  
  mfx_2 <- tibble(term = names(mfx_2_se),estimate = mfx_2_pe, std.error = mfx_2_se, conf.low = mfx_2_pe - 1.96*mfx_2_se, conf.high = mfx_2_pe + 1.96*mfx_2_se)
  mfx_2$model <- 2
  
  ## Model 3
  
  plm_out_3 <- plm(outcome_scale ~ factor(OccHumanProx_binned) + unemployed + occ_unemp + as.factor(wave),
                   effect = "individual",
                   index = c("id"),
                   model = "within",
                   data = bes_reduced)
  
  vcov_3 <- vcov(plm_out_3)
  
  mfx_3 <- tidy(plm_out_3, conf.int = TRUE) %>% 
    filter(grepl("OccHumanProx", term)) %>%
    mutate(term = c("OccHumanProx:Mid", "OccHumanProx:High"))
  mfx_3$model <- 3
  
  ## Model 4
  
  plm_out_4 <- plm(outcome_scale ~ factor(OccHumanProx_binned) * workHomeCoronaSelf_binary + unemployed + occ_unemp + as.factor(wave),
                   effect = "individual",
                   index = c("id"),
                   model = "within",
                   data = bes_reduced)
  
  vcov_4 <- vcov(plm_out_4)
  
  mfx_4_pe <- c(coef(plm_out_4)["factor(OccHumanProx_binned)mid"],
                coef(plm_out_4)["factor(OccHumanProx_binned)mid"] + coef(plm_out_4)["factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"],
                
                coef(plm_out_4)["factor(OccHumanProx_binned)high"],
                coef(plm_out_4)["factor(OccHumanProx_binned)high"] + coef(plm_out_4)["factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"])
  names(mfx_4_pe) <- c("OccHumanProx:Mid * WorkHome:No", "OccHumanProx:Mid * WorkHome:Yes",
                       "OccHumanProx:High * WorkHome:No", "OccHumanProx:High * WorkHome:Yes")
  
  mfx_4_se <- c(sqrt(diag(vcov_4))["factor(OccHumanProx_binned)mid"],
                sqrt(diag(vcov_4)["factor(OccHumanProx_binned)mid"] + diag(vcov_4)["factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"] + (2*vcov_4["factor(OccHumanProx_binned)mid","factor(OccHumanProx_binned)mid:workHomeCoronaSelf_binary"])),
                
                sqrt(diag(vcov_4))["factor(OccHumanProx_binned)high"],
                sqrt(diag(vcov_4)["factor(OccHumanProx_binned)high"] + diag(vcov_4)["factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"] + (2*vcov_4["factor(OccHumanProx_binned)high","factor(OccHumanProx_binned)high:workHomeCoronaSelf_binary"]))
  )
  
  names(mfx_4_se)  <- names(mfx_4_pe)
  
  mfx_4 <- tibble(term = names(mfx_4_se),estimate = mfx_4_pe, std.error = mfx_4_se, conf.low = mfx_4_pe - 1.96*mfx_4_se, conf.high = mfx_4_pe + 1.96*mfx_4_se)
  mfx_4$model <- 4
  
  mfx <- bind_rows(mfx_1, mfx_2, mfx_3, mfx_4) 
  
  mfx <- mfx %>% 
    mutate(term = gsub("OccHumanProx","OccProxRisk",term)) 
  
  mfx <- mfx %>%
    mutate(term = gsub("\\*", "x",term),
           term = factor(term, levels = c("OccProxRisk", 
                                          "OccProxRisk x WorkHome:No", 
                                          "OccProxRisk x WorkHome:Yes", 
                                          "OccProxRisk:Mid", 
                                          "OccProxRisk:High", 
                                          "OccProxRisk:Mid x WorkHome:No", 
                                          "OccProxRisk:Mid x WorkHome:Yes", 
                                          "OccProxRisk:High x WorkHome:No", 
                                          "OccProxRisk:High x WorkHome:Yes")),
           model = paste0("Model ", model))
  
  mfx %>% 
    ggplot(aes(x = estimate, y = term, group = model, xmin = conf.low, xmax = conf.high)) + geom_pointrange() + 
    facet_wrap(~model, scales = "free_y", ncol = 2) + 
    theme_bw() +
    geom_vline(xintercept = 0, linetype = 2) + 
    ggtitle(paste0("Treatment effects: ", var)) + 
    xlab("Estimate") + ylab("")
  if(occ_unemployed_digit == 1) ggsave(paste0("output/treatment_effects_",var,".png"), width = 10, height = 5)
  
  mfx %>% 
    filter(model != "Model 3") %>%
    mutate(model = gsub("Model 4", "Model 3", model)) %>%
    ggplot(aes(x = estimate, y = term, group = model, xmin = conf.low, xmax = conf.high)) + geom_pointrange() + 
    facet_wrap(~model, scales = "free_y", ncol = 3) + 
    theme_bw() +
    geom_vline(xintercept = 0, linetype = 2) + 
    ggtitle(paste0("Treatment effects: ", var)) + 
    xlab("Estimate") + ylab("")
  if(occ_unemployed_digit == 1) ggsave(paste0("output/treatment_effects_paper_",var,".png"), width = 10, height = 3)
  
  sink(paste0("output/",var,"_fe_models.txt"))
  cat(screenreg(list(plm_out_1, plm_out_2, plm_out_3, plm_out_4)))
  sink()
  
  plm_out_leads <- plm(outcome_scale ~ proximity * as.factor(wave) + unemployed + occ_unemp,
                       effect = "individual",
                       index = c("id"),
                       model = "within",
                       data = bes_reduced)
  
  tidy(plm_out_leads, conf.int = TRUE) %>%
    filter(grepl("proximity", term)) %>%
    mutate(term = as.numeric(gsub("\\D+","", term))) %>%
    ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) + 
    geom_pointrange() + 
    theme_bw() + 
    xlab("Survey wave") + 
    ylab("Marginal effect of occupational proximity risk") + 
    geom_vline(xintercept = 19.5, linetype = 2) + 
    geom_hline(yintercept = 0, linetype = 3) + 
    ggtitle(paste0("Parallel trends: ",var))
  if(occ_unemployed_digit == 1) ggsave(paste0("output/parallel_trends_",var,".png"), width = 8, height = 4)
  
  bes_reduced_tmp <- bes_reduced %>%
    mutate(outcome_scale = as.numeric((get(var)))) %>%
    mutate(date = case_when(wave == 14 ~ "2018-05-14",
                            wave == 15 ~ "2019-03-11",
                            wave == 16 ~ "2019-05-24",
                            wave == 17 ~ "2019-11-01",
                            wave == 18 ~ "2019-11-13",
                            wave == 19 ~ "2019-12-13",
                            wave == 20 ~ "2020-06-03"),
           date = as.Date(date))
  
  ylab_title <- var
  
  bes_reduced_tmp %>%
    group_by(proximity_binned, date) %>%
    summarise(taxSpendSelf_est = mean(outcome_scale, na.rm = TRUE),
              taxSpendSelf_lo = ifelse(sum(!is.na(outcome_scale)) > 0, t.test(outcome_scale)$conf[1], NA),
              taxSpendSelf_high = ifelse(sum(!is.na(outcome_scale)) > 0, t.test(outcome_scale)$conf[2], NA)) %>%
    filter(!is.na(proximity_binned) & !is.na(taxSpendSelf_est)) %>% 
    ungroup() %>%
    ggplot(aes(x = date, y = taxSpendSelf_est, ymin = taxSpendSelf_lo, ymax = taxSpendSelf_high, col = proximity_binned)) + 
    geom_pointrange(position = position_dodge(.1)) + 
    geom_line() + 
    theme_bw() + 
    theme(legend.position="bottom") +
    scale_color_manual("OccProximityRisk", values = c("black", "red", "blue"), labels = c("Low", "Mid", "High")) + 
    ylab(ylab_title) + xlab("Survey date") + 
    scale_x_date(breaks = as.Date(c(unique(bes_reduced_tmp$date[!is.na(bes_reduced_tmp[[var]])]),"2020-03-26")), date_labels = "%b-%Y") + 
    geom_vline(xintercept = as.Date("2020-03-26"), linetype = 2)
    
  ggsave(paste0("output/parallel_trends_2_",var,".png"), width = 9, height = 4)
  
  out <- list(plm_out_0.1, plm_out_1, plm_out_2, plm_out_3, plm_out_4)
  return(out)
  
}

dvs <- c("redistSelf", "taxSpendSelf", "riskUnemployment", "riskPoverty")
models_out_3dig <- lapply(dvs, function(x) analysis_func(x, occ_unemployed_digit = 3))
models_out_1dig <- lapply(dvs, function(x) analysis_func(x, occ_unemployed_digit = 1))
names(models_out_3dig) <- names(models_out_1dig) <- dvs

# Outputs

## 1. Regression tables for taxSpendSelf and redistSelf (1 digit occupational unemployment)

sink("output/taxSpendSelf_regressions.tex")
stargazer(models_out_1dig$taxSpendSelf[c(2,3,5)], 
          keep = c("OccHumanProx","unemployed","occ_unemp"),
          keep.stat = c("n"),
          no.space = TRUE,
          covariate.labels = c("OccProximityRisk",
                               "OccProximityRisk: Mid",
                               "OccProximityRisk: High",
                               "Unemployed",
                               "Occ. unemployment rate",
                               "OccProximityRisk * workHome",
                               "OccProximityRisk: Mid * workHome",
                               "OccProximityRisk: High * workHome"
                               ),
          float = FALSE,
          dep.var.caption = "",
          dep.var.labels = "taxSpendSelf",
          add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
                           c("Wave FEs", "Yes", "Yes", "Yes")))
sink()

sink("output/redistSelf_regressions.tex")
stargazer(models_out_1dig$redistSelf[c(2,3,5)], 
          keep = c("OccHumanProx","unemployed","occ_unemp"),
          keep.stat = c("n"),
          no.space = TRUE,
          covariate.labels = c("OccProximityRisk",
                               "OccProximityRisk: Mid",
                               "OccProximityRisk: High",
                               "Unemployed",
                               "Occ. unemployment rate",
                               "OccProximityRisk * workHome",
                               "OccProximityRisk: Mid * workHome",
                               "OccProximityRisk: High * workHome"
          ),
          float = FALSE,
          dep.var.caption = "",
          dep.var.labels = "redistSelf",
          add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
                           c("Wave FEs", "Yes", "Yes", "Yes")))
sink()

## 2. Regression tables for taxSpendSelf and redistSelf (3 digit occupational unemployment)

sink("output/taxSpendSelf_regressions_3dig.tex")
stargazer(models_out_3dig$taxSpendSelf[c(2,3,5)], 
          keep = c("OccHumanProx","unemployed","occ_unemp"),
          keep.stat = c("n"),
          no.space = TRUE,
          covariate.labels = c("OccProximityRisk",
                               "OccProximityRisk: Mid",
                               "OccProximityRisk: High",
                               "Unemployed",
                               "Occ. unemployment rate",
                               "OccProximityRisk * workHome",
                               "OccProximityRisk: Mid * workHome",
                               "OccProximityRisk: High * workHome"
          ),
          float = FALSE,
          dep.var.caption = "",
          dep.var.labels = "taxSpendSelf",
          add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
                           c("Wave FEs", "Yes", "Yes", "Yes")))
sink()

sink("output/redistSelf_regressions_3dig.tex")
stargazer(models_out_3dig$redistSelf[c(2,3,5)], 
          keep = c("OccHumanProx","unemployed","occ_unemp"),
          keep.stat = c("n"),
          no.space = TRUE,
          covariate.labels = c("OccProximityRisk",
                               "OccProximityRisk: Mid",
                               "OccProximityRisk: High",
                               "Unemployed",
                               "Occ. unemployment rate",
                               "OccProximityRisk * workHome",
                               "OccProximityRisk: Mid * workHome",
                               "OccProximityRisk: High * workHome"
          ),
          float = FALSE,
          dep.var.caption = "",
          dep.var.labels = "redistSelf",
          add.lines = list(c("Individual FEs", "Yes", "Yes", "Yes"),
                           c("Wave FEs", "Yes", "Yes", "Yes")))
sink()

# Descriptives

## Does proximity predict subjective COVID experiences?

cov_attitudes <- c("worryCoronaHealth", "dutyCorona", "coronaDied", "hadCovidSelf", "cvFreedomSelf", "cvEconSelf")

lapply(cov_attitudes, function(x) {
  
  bes_reduced_tmp <- bes_reduced[bes_reduced$wave == 20,]
  
  bes_reduced_tmp$outcome_tmp <- as.numeric(scale(bes_reduced_tmp[[x]]))
  tmp <- tidy(lm(outcome_tmp ~ scale(OccHumanProx), bes_reduced_tmp), conf.int = TRUE)
  tmp$outcome <- x
  tmp$controls <- FALSE
  
  tmp2 <- tidy(lm(outcome_tmp ~ scale(OccHumanProx) + educ + region + income + age + I(age^2) + housing, bes_reduced_tmp), conf.int = TRUE)
  tmp2$outcome <- x
  tmp2$controls <- TRUE
  
  
  rbind(tmp, tmp2)
}) %>%
  bind_rows() %>%
  filter(grepl("OccHumanProx",term)) %>%
  mutate(controls = ifelse(controls, "...with controls", "...without controls")) %>%
  mutate(outcome = case_when(outcome == "worryCoronaHealth" ~ "Worried about catching COVID",
                             outcome == "hadCovidSelf" ~ "Reports having had COVID",
                             outcome == "dutyCorona" ~ "There is a duty to follow COVID rules",
                             outcome == "cvFreedomSelf" ~ "Restrict personal freedoms to fight infections",
                             outcome == "cvEconSelf" ~ "Reduce infections even if it damages the economy",
                             outcome == "coronaDied" ~ "Personally knows someone who died from COVID"),
         outcome = fct_reorder(outcome, estimate)) %>%
  ggplot(aes(x = estimate, y = outcome, xmin = conf.low, xmax = conf.high, col = controls, group = controls)) + geom_pointrange(position = position_dodge(width = .2)) + 
  #facet_wrap(~outcome, ncol = 4) + 
  theme_bw() + 
  geom_vline(xintercept = 0, linetype = 2) + 
  xlab("Estimate") + 
  ylab("") + 
  ggtitle("Proximity risk and COVID experiences/attitudes") + 
  scale_color_manual("Effect of proximity risk...", values= c("black", "gray")) 

ggsave("output/proximity_subjective_validation.png", width = 8, height = 4.5)

## Does proximity risk predict COVID death rates?

exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
  transmute(soc2010 = `UK SOC 2010 Code`,
            proximity = `Proximity to others`,
            proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))

death_rates <- readxl::read_excel("data/covid_death_rates.xlsx", sheet = 11, skip = 16) %>% 
  filter(!is.na(`Deaths involving COVID-19...4`)) %>%
  mutate(total_covid_deaths = `Deaths involving COVID-19...4` + `Deaths involving COVID-19...8`,
         total_deaths = `Average all cause mortality (2015 to 2019)...6` + `Average all cause mortality (2015 to 2019)...10`,#`All causes of death...5` + `All causes of death...9`,
         covid_death_rate = total_covid_deaths/total_deaths,
         soc2010 = `...1`) %>%
  select(soc2010, total_covid_deaths, total_deaths, covid_death_rate)

death_rates <- full_join(exposure, death_rates) 
death_rates <- death_rates %>% filter(covid_death_rate != Inf)

pred_deaths <- predict(lm(covid_death_rate ~ proximity, death_rates, weights = total_deaths), 
                       newdata = data.frame(proximity = quantile(death_rates$proximity, c(.10, .90), na.rm = TRUE)))

diff(pred_deaths)/pred_deaths[1]

death_rates %>% 
  ggplot(aes(x = proximity, y = covid_death_rate, size = total_deaths/20)) + 
  geom_point(alpha = .2) + 
  scale_size_continuous(guide = FALSE) +
  geom_smooth(method = "lm", mapping = aes(weight = total_deaths/2), col = "black", size = 1.5) + 
  xlab("Occupational proximity risk") + 
  ylab("Occupational COVID death rate") + 
  ggtitle("Proximity risk and COVID death rate") + 
  theme_bw()

ggsave("output/proximity_objective_validation.png", width = 6, height = 5)


## Which are the highest and lowest proximity occupations?

exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
  transmute(occ_title = `Occupation title`,
            soc2010 = `UK SOC 2010 Code`,
            proximity = `Proximity to others`,
            proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))

high_risk <- exposure[order(exposure$proximity, decreasing = T),][1:10,]
low_risk <- exposure[order(exposure$proximity, decreasing = F),][1:10,]


out <- data.frame(Occupation = c(high_risk$occ_title, "...", low_risk$occ_title[nrow(low_risk):1]),
                  OccProxRisk = c(high_risk$proximity, "...",low_risk$proximity[nrow(low_risk):1]))

colnames(out) <- c("Occupation", "$OccProxRisk$")

library(xtable) # v.1.8-4

sink("output/risky_occupations.tex")
print.xtable(xtable(out), include.rownames = FALSE, floating = FALSE, sanitize.colnames.function = function(x) {x})
sink()

## What fraction of each occupational category work from home?

wfh_prox <- bes_reduced %>%
  filter(wave == 20) %>%
  group_by(soc2010) %>%
  summarise(wfh = mean(workHomeCoronaSelf=="Yes", na.rm = TRUE)) %>%
  full_join(exposure, wfh_prox, by = "soc2010") %>%
  mutate(wfh_prox = proximity * (1-wfh))

high_risk <- wfh_prox[order(wfh_prox$wfh_prox, decreasing = T),][1:10,]
low_risk <- wfh_prox[order(wfh_prox$wfh_prox, decreasing = F),][1:10,]

wfh_prox %>%
  ggplot(aes(x = proximity, y = wfh, label = occ_title, size = wfh*10)) + 
  geom_point(alpha = .2) +
  geom_text() +
  theme_bw() + 
  scale_size_continuous(guide = FALSE) + 
  xlab("Occupational proximity risk") + 
  ylab("Fraction working from home due to COVID") + 
  scale_size_identity(trans="sqrt",guide=FALSE) + 
  ggtitle("Occupational proximity risk and proportion working from home")

ggsave("output/proximity_by_wfh.png", width = 10, height = 7)


## Plot of occupational unemployment rate vs occupational health risk

exposure <- read_csv("data/occupations_exposure_to_disease_data.csv") %>%
  transmute(occ_title = `Occupation title`,
            soc2010 = `UK SOC 2010 Code`,
            proximity = `Proximity to others`,
            proximity_binned = factor(cut(proximity, 3), labels = c("low","mid","high")))

## 3 digit

occ_rates_min <- readstata13::read.dta13("data/LFS/ourmin_combined.dta")
obs_min <- readstata13::read.dta13("data/LFS/obsmin.dta")

obs_min <- tibble(obs_min) %>%
  group_by(soc_minor) %>%
  summarise(obs_aprjun18 = sum(obs_aprjun18),
            obs_janmar19 = sum(obs_janmar19),
            obs_octdec19 = sum(obs_octdec19),
            obs_aprjun20 = sum(obs_aprjun20))

# Add number of observations in each category

occ_rates_min <- tibble(full_join(occ_rates_min, obs_min, by = "soc_minor"))

to_plot <- exposure %>% mutate(soc_minor = substring(soc2010,1,3)) %>%
  group_by(soc_minor) %>%
  summarize(proximity = mean(proximity)) %>%
  mutate(soc_minor = as.numeric(soc_minor)) %>%
  full_join(occ_rates_min,
            by = "soc_minor") %>%
  mutate(our_diff = our_aprjun20 - our_octdec19,
         n = (obs_octdec19 + obs_aprjun20)/2) 

to_plot %>%
  ggplot(aes(y = proximity, x = our_octdec19, size = sqrt(obs_octdec19), weight = obs_octdec19)) + 
  geom_point(col = alpha("black", .4)) + 
  theme_bw() + 
  ylab("Occupational proximity risk") + 
  xlab("Occupational unemployment rate") + 
  geom_smooth(method = "lm", col = "black") + 
  guides(size = "none") + 
  ylim(c(0,100))

ggsave(paste0("output/proximity_v_pre_crisis_oup.png"), width = 5, height = 4)

to_plot %>%
  ggplot(aes(y = proximity, x = our_diff, size = sqrt(n), weight = n)) + geom_point(col = alpha("black", .4)) + 
  theme_bw() + 
  ylab("Occupational proximity risk") + 
  xlab("Change in occupational unemployment rate") + 
  geom_smooth(method = "lm", col = "black") + 
  guides(size = "none") + 
  ylim(c(0,100))

ggsave(paste0("output/proximity_v_change_in_oup.png"), width = 5, height = 4)

## Aggregate distribution of taxSpendSelf across survey waves 

tibble(reshape2::melt(prop.table(table(bes_reduced$wave,bes_reduced$taxSpendSelf),1))) %>%
  filter(!is.na(value)) %>%
  mutate(wave = as.character(Var1),
         Var2 = factor(Var2, levels = 1:11)) %>%
  ggplot(aes(x=Var2, y = value, fill = wave)) + 
  geom_bar(position = "dodge", stat = "identity") + 
  theme_bw() + 
  ylab("Proportion of respondents") + 
  xlab("taxSpendSelf") + 
  scale_fill_manual("Wave", values = c(alpha("black", .2), alpha("black", .4), alpha("black", .6), "black"))

ggsave("output/taxSpendSelf_distribution_by_wave.png", width = 8, height = 4)

## Is there any differential polarization/convergence within proximity-risk groups over time?

sd_est <- bes_reduced %>%
  group_by(wave, proximity_binned) %>%
  summarise(sd_taxSpendSelf = sd(taxSpendSelf, na.rm = TRUE),
            .groups = "drop") 

do_boot <- function(){
  x <- bes_reduced %>%
  slice_sample(prop = 1, replace = TRUE) %>%
  group_by(wave, proximity_binned) %>%
  summarise(sd_taxSpendSelf = sd(taxSpendSelf, na.rm = TRUE),
            .groups = "drop")
  x$sd_taxSpendSelf
}

boot_out <- replicate(1000, do_boot())

boot_low <- apply(boot_out,1,quantile, 0.025, na.rm = TRUE)
boot_hi <- apply(boot_out,1,quantile, 0.975, na.rm = TRUE)

sd_est$est <- sd_est$sd_taxSpendSelf
sd_est$lo <- boot_low
sd_est$hi <- boot_hi

sd_est %>%
  mutate(date = case_when(wave == 14 ~ "2018-05-14",
                          wave == 15 ~ "2019-03-11",
                          wave == 16 ~ "2019-05-24",
                          wave == 17 ~ "2019-11-01",
                          wave == 18 ~ "2019-11-13",
                          wave == 19 ~ "2019-12-13",
                          wave == 20 ~ "2020-06-03"),
         date = as.Date(date)) %>%
  filter(!is.na(proximity_binned) & !is.na(sd_taxSpendSelf)) %>%
  ggplot(aes(x = date, y = est, 
             ymin = lo, ymax = hi,
             col = proximity_binned,
             group = proximity_binned)) + 
  geom_pointrange(position = position_dodge(15)) + geom_line() + 
  theme_bw() + 
  xlab("Wave") + 
  ylab("sd(taxSpendSelf)") + 
  ylim(c(1,3)) +
  scale_color_manual("Occupational proximity risk", values = c("gray", "black", "red")) + 
  scale_x_date(breaks = as.Date(c("2018-05-14", "2019-03-11", "2019-11-13", "2020-03-26", "2020-06-03")), date_labels = "%b-%Y") +
  theme(legend.position = "bottom") + 
  geom_vline(xintercept = as.Date("2020-03-26"), linetype = 2)

ggsave("output/taxSpendSelf_polarization_by_wave_by_group.png", width = 8, height = 4)
